##Processing
#To import and clean data

impclean <- function(path, sheet_name, values_to_new){
  df<- read_excel(path, sheet = sheet_name, skip =2)
  colnames(df)[c(2,4)] <- c("hs02", "year")
  df <- df %>%
    filter(!is.na(.data[["hs02"]]))%>%
    pivot_longer(
      cols = January:December,
      names_to = "month",
      values_to = values_to_new)%>%
    select(hs02, year, month, values_to_new)%>%
    pivot_wider(
      names_from = year,
      values_from = values_to_new,
      names_prefix = paste0(values_to_new, "_"))%>%
    mutate(hs02 = as.numeric(hs02)) %>%
    filter(month %in% month.name[4:7])%>%
  return (df)
}


IM_data <- impclean(path_to_IM, "CIF Import Value", "cifval")
EX_data <- impclean(path_to_EX, "FAS Value", "expval")
cald <- impclean(path_tariff_rev, "Calculated Duties", "cald")
cifval <- impclean(path_tariff_rev,  "CIF Import Value", "cifval")


# # To create a clean list of available (there are missing ones) 
# import/export price index for harmonized imports/export, HTS chapters, and total

find_index <- function(index_data, condition, index_type) {
  index_data %>%
    mutate(series_id = str_trim(series_id)) %>%
    filter(grepl(condition, series_id)) %>%
    {
      if (index_type == "hs02") {
        mutate(., hs02 = str_sub(series_id, -2, -1))
      } else if (index_type == "hts_chapters") {
        mutate(., hts_chapters = str_extract(series_id, "[IVX]+$"))
      } else if (index_type %in% c("total")) {
        mutate(., index_total = index_type)
      } else {.
      }
    } %>%
    select(-footnote_codes) %>%
    filter(year >= 2024)
}
hs2_im_index <- find_index(price_index, "^EIUIP[0-9]{2}$", "hs02")
hs2_ex_index <- find_index(price_index, "^EIUID[0-9]{2}$", "hs02")
hs_chapters_im_index <- find_index(price_index, "^EIUIP[IVX]+$", "hts_chapters")
hs_chapters_ex_index <- find_index(price_index, "^EIUID[IVX]+$", "hts_chapters")
total_im_index <- find_index(price_index, "^EIUIR$", "total")
total_ex_index <- find_index(price_index, "^EIUIQ$", "total")



process_index <- function(index_data, ei_series, varname_col) {
  ei_series <- ei_series %>% 
    mutate(series_id = str_trim(as.character(series_id)))
  
  index_data <- index_data %>%
    mutate(
      month_num = as.integer(str_remove(period, "M")),
      month = month(month_num, label = TRUE, abbr = FALSE)
    ) %>%
    select(year, month, series_id, value, all_of(varname_col))%>%
    pivot_wider(
      names_from = year,
      names_prefix = "index_",
      values_from = value
    ) %>%
    rename(
      "{varname_col}_2024" := index_2024,
      "{varname_col}_2025" := index_2025
    ) %>%
    filter(month %in% month.name[1:7])%>%
  return(index_data)
  
}

# For HS2 import price index
hs02_im_index_processed <- process_index(hs2_im_index, ei_series, varname_col = "hs02")
hs02_ex_index_processed <- process_index(hs2_ex_index, ei_series, varname_col = "hs02")

# For HTS chapters index
hts_im_index_processed <- process_index(hs_chapters_im_index, ei_series, varname_col = "hts_chapters")
hts_ex_index_processed <- process_index(hs_chapters_ex_index, ei_series, varname_col = "hts_chapters")

total_im_index_processed <- process_index(total_im_index, ei_series, varname_col = "index_total")
total_ex_index_processed <- process_index(total_ex_index, ei_series, varname_col = "index_total")

